##
## This file implements a number of model-agnostic variance estimators
## for generalized linear models fitted to time-series and longitudinal data
##
## It includes functions for clustered data
##  	GEE sandwich estimator		infjack.glm()
## 	1-step cluster jackknife	jack.glm()
##	sandwich estimator for 
##	crossed clustering		xeffect.glm()
##  and time-series data
## 	Newey-West estimator		newey.west.glm()
##	Extended kernel estimator	kernelvar.glm()
##	General weighted sandwich	weightvar.glm()
##	1-step (Lele) jackknife		lele.glm()
##	WEAVE adaptive lag estimate 	weave.trunc()
##	WEAVE smooth adaptive estimate 	weave.smooth()
##
## and some other functions called by these ones. These functions will
## work on large data sets as they do not require construction of large
## matrices. They are entirely in interpreted code.
##
## (c) 1997 Thomas Lumley (thomas@biostat.washington.edu)
## Distributed under the terms of the GNU General Public License, version 2.
## see file COPYING for details.
##
## This is free software, there is no warranty whatsoever.
##
#### Documentation
##
##   jack.glm(glm.object,groups), infjack.glm(glm.object,groups)
##     Variance estimates for independence working models fitted to 
##     clustered data. The first is based on the one-step jackknife, 
##     the second is the GEE sandwich estimator based on the infinitesimal
##     jackknife.
##
##   newey.west.glm(glm.object,times,lag,clag)
##     Newey-West estimator for estimating equations fitted to time series
##     data. The truncation lag (lag=) optimally increases as the cube root
##     of sample size, you can specify the multiplier (clag=) instead of lag=.
##   Reference: 
##   Newey W , West K (1987). "A simple, positive definite, heteroskedasticity
##     and autocorrelation consistent covariance matrix."
##     Econometrica 55:703--708
##   Andrews.(1991) "Heteroskedasticity and Autocorrelation Consistent 
##     Covariance Matrix Estimation" Econometrica  59:817--858
##
##   kernelvar.glm<-function(glm.obj,times, lag=NULL,clag=NULL, 
##                      kernel=c("tukey","bartlett","parzen"))
##     Estimator based on specified weight function: "bartlett" gives
##     Newey-West estimator, other two are better. If clag= is specified the
##     truncation lag is clag*n^a where a=1/3 for "bartlett", 1/5 for the 
##     other kernels.
##     Reference:
##     Andrews.(1991) "Heteroskedasticity and Autocorrelation Consistent 
##     Covariance Matrix Estimation" Econometrica  59:817--858
##
##   weightvar.glm<-function(glm.obj,times,weights)
##	Roll your own. Specify a vector of weights for lags 0:M 
##
##
##   weave.trunc(glm.obj,times, lag=NULL, ctrunc=4)
##     weighted empirical adaptive variance estimate with weights
##     (n*rho^2>ctrunc)  or with weights 1 up to lag=lag, 0 further away
##   weave.smooth(glm.obj,times,csmooth=1)
##     weighted empirical adaptive variance estimate with weights
##     min(1,csmooth*N*rho^2)
##     Reference:  Thomas Lumley 
##
##   lele.glm(glm.obj,times,lag)
##     One-step version of Lele's estimating equation jackknife.
##    Reference: Lele (1991) "Jackknifing Linear Estimating Equations: 
##    Asymptotic Theory and Applications in Stochastic Processes" 
##    JRSS B 53:253-267
####


weave.trunc<-function(glm.obj,times, lag=NULL, ctrunc=4){
	index<-order(times)
	res<-residuals(glm.obj,"response")[index]
	n<-length(res)
	umat<-estfun.glm(glm.obj)[index,,drop=F]
	utu<-0.5*t(umat)%*%umat
	if (is.null(lag)){
		rhohat<-isocorrs(res)
		lag<-max( (1:n)[rhohat^2*n>ctrunc])
	}
	if (lag>1){
 	  for (ii in 2:lag){
		utu<-utu+ t(umat[1:(n-ii+1),])%*%umat[ii:n,]
		}
	}
	utu<-utu+t(utu)
	modelv<-summary(glm.obj)$cov.unscaled
	lag0<-lag-1
	bc<-n^2/(n^2-n*(2*lag0+1)+lag0*lag0)
	df<-n^2/(n*(2*lag0+1)-lag0*lag0)
	list(var=modelv%*%utu%*%modelv,bias.correction=bc,df=df)
	}

weave.smooth<-function(glm.obj,times,  csmooth=1){
	index<-order(times)
	res<-residuals(glm.obj,"response")[index]
	n<-length(res)
	umat<-estfun.glm(glm.obj)[index,,drop=F]
	utu<-0.5*t(umat)%*%umat
	rhohat<-isocorrs(res)
	lag<-max((1:n)[rhohat^2*n>0.00001])
	w<-csmooth*n*rhohat^2
	w<-ifelse(w>1,1,w)
	wsum<-0
	w2sum<-0
	if (lag>1){
	   for (ii in 2:lag){
		utu<-utu+ w[ii]*t(umat[1:(n-ii+1),])%*%umat[ii:n,]
		wsum<-wsum+(n-ii+1)*w[ii]
		w2sum<-w2sum+(n-ii+1)*w[ii]*w[ii]
		}
	}
	wsum<-wsum*2+n
	w2sum<-w2sum*2+n
	utu<-utu+t(utu)
	modelv<-summary(glm.obj)$cov.unscaled
	bc<-n^2/(n^2-wsum)
	df<-n^2/w2sum
	list(var=modelv%*%utu%*%modelv,bias.correction=bc,df=df)
	}

infjack.glm<-function(glm.obj,groups){
	if (!exists("rowsum")) require(survival4)
	umat<-estfun.glm(glm.obj)
	usum<-rowsum(umat,groups,reorder=F)
	modelv<-summary(glm.obj)$cov.unscaled
	modelv%*%(t(usum)%*%usum)%*%modelv
	}

jack.glm<-function(glm.obj,groups){
	if (!exists("rowsum")) require(survival4)
	umat<-jackvalues(glm.obj)
	usum<-rowsum(umat,groups,reorder=F)
	t(usum)%*%usum*(nrow(umat)-1)/nrow(umat)
	}

xeffect.glm<-function(glm.obj,g1,g2){
	if (!exists("rowsum")) require(survival4)
	umat<-estfun.glm(glm.obj)
	usum1<-rowsum(umat,g1,reorder=F)
	usum2<-rowsum(umat,g2,reorder=F)
	g1a<-as.numeric(as.factor(g1))
	g2a<-as.numeric(as.factor(g2))
	g12<-g1a*(1+max(g2a))+g2a
	usum12<-rowsum(umat,g12,reorder=F)
	utu<-(t(usum1)%*%usum1)+t(usum2)%*%usum2-t(usum12)%*%usum12
	modelv<-summary(glm.obj)$cov.unscaled
	modelv%*%utu%*%modelv
}
newey.west.glm<-function(glm.obj,times, lag=NULL,clag=NULL){
	index<-order(times)
	res<-residuals(glm.obj,"response")[index]
	n<-length(res)
	if (is.null(lag)){
		if (!is.null(clag)) lag<-trunc(clag*n^(1/3))
		 else stop("lag not specified")
	}
	umat<-estfun.glm(glm.obj)[index,,drop=F]
	utu<-0.5*t(umat)%*%umat
	if (lag>1){
	  for (ii in 2:lag){
		utu<-utu+ (1-ii/lag)*t(umat[1:(n-ii+1),])%*%umat[ii:n,]
		}
	}
	utu<-utu+t(utu)
	modelv<-summary(glm.obj)$cov.unscaled
	bc<-n^2/(n^2-n*(2*lag+1)+lag*lag)
	df<-n^2/(n*(2*lag+1)-lag*lag)
	list(var=modelv%*%utu%*%modelv,bias.correction=bc,df=df)
	}


kernelvar.glm<-function(glm.obj,times, lag=NULL,clag=NULL, kernel=c("tukey","bartlett","parzen")){
	index<-order(times)
	res<-residuals(glm.obj,"response")[index]
	n<-length(res)
	if (is.null(lag)){
		if (!is.null(clag)) 
			lag<-switch(pmatch(kernel, 
					c("tukey","bartlett","parzen"), nomatch=4)[1], 
				trunc(clag*n^(1/5)), trunc(clag*n^(1/3)), 
				trunc(clag*n^(1/5)), stop("unrecognised kernel"))
		 else stop("lag not specified")
	}
	w.tukey<-function(M) {
		(1+cos((0:M)*pi/M))/2
	}
	w.bartlett<-function(M){
		(M:1)/M
	}
	w.parzen<-function(M){
		m<-trunc(M/2)
		c(1-6*((0:m)/M)^2+6*((0:m)/M)^3,2*(1-((m+1):M)/M)^3)
	}
	weights<-switch(pmatch(kernel,c("tukey","bartlett","parzen"),nomatch=4)[1],w.tukey(lag),w.bartlett(lag),w.parzen(lag),stop("unrecognised kernel")) 
	umat<-estfun.glm(glm.obj)[index,,drop=F]
	utu<-0.5*t(umat)%*%umat*weights[1]
	wsum<-n/2
	w2sum<-n/2
	if (lag>1){
	  for (ii in 2:lag){
		utu<-utu+ weights[ii]*t(umat[1:(n-ii+1),])%*%umat[ii:n,]
		wsum<-wsum+(n-ii+1)*weights[ii]
		w2sum<-w2sum+(n-ii+1)*weights[ii]^2
		}
	}
	utu<-utu+t(utu)
	wsum<-2*wsum
	w2sum<-2*w2sum
	modelv<-summary(glm.obj)$cov.unscaled
	bc<-n^2/(n^2-wsum)
	df<-n^2/w2sum
	list(var=modelv%*%utu%*%modelv,bias.correction=bc,df=df)
	}

weightvar.glm<-function(glm.obj,times,weights){
	index<-order(times)
	res<-residuals(glm.obj,"response")[index]
	n<-length(res)
	lag<-length(weights)
	umat<-estfun.glm(glm.obj)[index,,drop=F]
	utu<-0.5*t(umat)%*%umat*weights[1]
	wsum<-n*weights[1]/2
	w2sum<-n*weights[1]^2/2
	if (lag>1){
	  for (ii in 2:lag){
		utu<-utu+ weights[ii]*t(umat[1:(n-ii+1),])%*%umat[ii:n,]
		wsum<-wsum+(n-ii+1)*weights[ii]
		w2sum<-w2sum+(n-ii+1)*weights[ii]^2
		}
	}
	utu<-utu+t(utu)
	wsum<-2*wsum
	w2sum<-2*w2sum
	modelv<-summary(glm.obj)$cov.unscaled
	bc<-n^2/(n^2-wsum)
	df<-n^2/w2sum
	list(var=modelv%*%utu%*%modelv,bias.correction=bc,df=df)
	}

lele.glm<-function(glm.obj,times,lag){
	index<-order(times)
	umat<-jackvalues(glm.obj)[index,,drop=F]
	n<-nrow(umat)
	utu<-0.5*t(umat)%*%umat
	if (lag>1){
	  for (ii in 2:lag){
		utu<-utu+ t(umat[1:(n-ii+1),])%*%umat[ii:n,]
		}
	}
	utu<-utu+t(utu)
	return(utu*(n)/(n-1))
	}

jackvalues<-function(glm.obj){
	if (is.R() & as.numeric(version$minor)<50){
		db<-lm.influence(glm.obj)$coef*sqrt(glm.obj$weights)
	} else {
		db<-lm.influence(glm.obj)$coef
	}
	t(t(db)-apply(db,2,mean))
}	


estfun.glm<-function(glm.obj){
	if (is.matrix(glm.obj$x)) 
		xmat<-glm.obj$x
	else {
		mf<-model.frame(glm.obj)
		xmat<-model.matrix(terms(glm.obj),mf)		
	}
	residuals(glm.obj,"working")*glm.obj$weights*xmat
}


is.R<-function(){if (is.null(version$language)) F else version$language=="R"}

if (!is.R()){
	cov<-function(x,y) var(x,y)
	Machine<-function() .Machine
	}

acf.R<-function(x,t=1:length(x),lag=trunc(5*sqrt(length(x)))){
   xbar<-mean(x)
   x<-x[order(t)]-xbar
   n<-length(x)
   autocov<-function(ii,xx){
          cov(xx[1:(length(xx)-ii+1)],xx[ii:length(xx)])
          }
   covs<-sapply(2:lag,autocov,xx=x)
   covs/var(x)
}


 pava.blocks<-function(x,w=rep(1,length(x)),b=rep(1,length(x)),up=T){
	lasti<-1
	if (length(x)==1) return(list(x=x,blocks=b,increasing=up))
	for (i in 2:length(x)){
		if (x[i]<=x[lasti] & up){
		   wtotal<-w[lasti]+w[i]
		   x[lasti]<-(x[lasti]*w[lasti]+x[i]*w[i])/wtotal
		   w[lasti]<-wtotal
		   b[lasti]<-b[i]+b[lasti]
		   b[i]<-0
		 }else if (x[i]<=x[lasti] & !up){
		   lasti<-i
		 }else if (x[i]>x[lasti] & !up){
		   wtotal<-w[lasti]+w[i]
		   x[lasti]<-(x[lasti]*w[lasti]+x[i]*w[i])/wtotal
		   w[lasti]<-wtotal
		   b[lasti]<-b[i]+b[lasti]
		   b[i]<-0
		 }else if(x[i]>x[lasti] & up){
		   lasti<-i
		 }
		}
	 if (any(b==0)) {
		subset<-(b>0)
		return(pava.blocks(x[subset],w[subset],b[subset],up))
		}
	 else return(list(x=x,blocks=b,increasing=up))
}
	

isocorrs<-function(u,lagmax=trunc(5*sqrt(length(u)))){
	lagmax<-min(length(u)-1,lagmax)
	covs<-acf.R(u,lag=lagmax)
	isocovs<-pava.blocks(c(covs,0),c((length(u)-1):(length(u)-length(covs)),Machine()$double.xmax),up=F)
	expanded<-numeric(length(u))
	expanded[1]<-1
	starti<-2
	for (i in 1:length(isocovs$x)){
		expanded[starti:(starti+isocovs$blocks[i]-1)]<-isocovs$x[i]
		starti<-starti+isocovs$blocks[i]
		}
	expanded
	}






